#  ██████╗ ██╗  ██╗██╗   ██╗██╗      ██████╗ ██████╗ ██╗  ██╗███████╗██████╗ ███████╗
#  ██╔══██╗██║  ██║╚██╗ ██╔╝██║     ██╔═══██╗██╔══██╗██║  ██║██╔════╝██╔══██╗██╔════╝
#  ██████╔╝███████║ ╚████╔╝ ██║     ██║   ██║██████╔╝███████║█████╗  ██████╔╝█████╗
#  ██╔═══╝ ██╔══██║  ╚██╔╝  ██║     ██║   ██║██╔═══╝ ██╔══██║██╔══╝  ██╔══██╗██╔══╝
#  ██║     ██║  ██║   ██║   ███████╗╚██████╔╝██║     ██║  ██║███████╗██║  ██║███████╗
#  ╚═╝     ╚═╝  ╚═╝   ╚═╝   ╚══════╝ ╚═════╝ ╚═╝     ╚═╝  ╚═╝╚══════╝╚═╝  ╚═╝╚══════╝

## PHYLOPHERE: A Nextflow pipeline including a complete set
## of phylogenetic comparative tools and analyses for Phenome-Genome studies

### Github: https://github.com/nozerorma/caastools/nf-phylophere
### Author: Miguel Ramon (miguel.ramon@upf.edu)

#### File: CI-composition.Rmd

Computing Credibility Intervals using Jeffreys Method

This script computes 95% credibility intervals (CIs) for the trait of interest:


Setup and Directory Configuration

This section configures the working environment, sets directories, and loads necessary functions and libraries.

# Call the setup function from commons.R
source("./obj/commons.R")
## [DEBUG] args = cancer_traits_processed-LQ.csv | science.abn7829_data_s4.nex.tree | data_exploration | 1998 | primates | family | neoplasia_prevalence | adult_necropsy_count | neoplasia_necropsy | /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv | malignant_prevalence | LQ
## [DEBUG] seed_val = 1998
## [DEBUG] workingDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/cf/c175614870c9d79243bb3274357168
## [DEBUG] objDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/cf/c175614870c9d79243bb3274357168/obj
## [DEBUG] resultsDir = data_exploration
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## [1] "Working Directory: /home/miguel/IBE-UPF/PhD/PhyloPhere/work/cf/c175614870c9d79243bb3274357168"
## [1] "Results Directory: data_exploration"
## [DEBUG] Working Directory: /home/miguel/IBE-UPF/PhD/PhyloPhere/work/cf/c175614870c9d79243bb3274357168
## [DEBUG] Results Directory: data_exploration
## [DEBUG] data_exploration_dir = data_exploration/1.Data-exploration
## [DEBUG] species_distribution_dir = data_exploration/1.Data-exploration/1.Species_distribution
## [DEBUG] extreme_plots_dir = data_exploration/1.Data-exploration/2.Extreme_plots
## [DEBUG] phylo_distribution_dir = data_exploration/1.Data-exploration/3.Phylogenetic_distribution
## [DEBUG] ci_dir = data_exploration/1.Data-exploration/4.CI_overlaps
## [DEBUG] palettes loaded: primates=15, primates_dark=15, mammals=17
## [1] "Loading trait data from: cancer_traits_processed-LQ.csv"
## [DEBUG] trait_path = cancer_traits_processed-LQ.csv
## [DEBUG] trait_df rows = 47, cols = 19
## [DEBUG] trait_df columns: tax_id, species, common_name, family, adult_necropsy_count, neoplasia_necropsy, neoplasia_prevalence, benign_count, benign_prevalence, malignant_count, malignant_prevalence, body_mass, mass_g, log_body_mass, mature_f, mature_m, mature_age, MLS.anage, LQ
## [DEBUG] trait_df species unique = 47
## [DEBUG] tree_path = science.abn7829_data_s4.nex.tree
## [DEBUG] tree tips = 236, nodes = 235
## [DEBUG] clade_name = primates
## [DEBUG] taxon_of_interest = family
## [DEBUG] trait = neoplasia_prevalence
## [DEBUG] n_trait = neoplasia_necropsy
## [DEBUG] p_trait = adult_necropsy_count
## [DEBUG] has.p = TRUE, p missing = 0
## [DEBUG] has.n = TRUE, n missing = 0
## Warning: package 'ape' was built under R version 4.4.2
## 
## Attaching package: 'ape'
## The following object is masked from 'package:dplyr':
## 
##     where
## [DEBUG] tax_id_file = /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv
## [DEBUG] tax_id_df rows = 1290, distinct taxa = 807
## [DEBUG] has.TAX_ID = TRUE
## [DEBUG] trait_df merged with tax_id_df: rows = 47, missing tax_id = 0
## [DEBUG] normalized tax_id from merged columns, missing tax_id = 0
## [DEBUG] tree_ids rows = 40, missing tax_id = 0
## [DEBUG] common_tax_ids = 40
## [DEBUG] pruned_tree tips (TAX_ID) = 40, nodes = 39
## [DEBUG] trait_df after TAX_ID tree filter rows = 40
## [DEBUG] secondary_trait = malignant_prevalence
## [DEBUG] has.secondary = TRUE, missing = 0
## [DEBUG] branch_trait = LQ
## [DEBUG] has.branch = TRUE, missing = 0
setup_rmd()

# Debug helper (prints into HTML output)
if (is.null(getOption("phylo_debug"))) {
  options(phylo_debug = TRUE)
}
if (!exists("phylo_debug_log", envir = .GlobalEnv)) {
  phylo_debug_log <- character()
}
if (!exists("debug_log", inherits = TRUE)) {
  debug_log <- function(...) {
    msg <- sprintf(...)
    phylo_debug_log <<- c(phylo_debug_log, msg)
    cat("[DEBUG] ", msg, "\n", sep = "")
  }
}

# Load additional libraries
library(dplyr)
library(tidyr)
library(tibble)
library(ggplot2)
library(patchwork)
library(ggtext)

color_palette <- paste0(clade_name, "_palette") # Color palette for the clade
capitalized_taxon <- tools::toTitleCase(taxon_of_interest) # Capitalized taxon name
capitalized_trait <- tools::toTitleCase(gsub("_", " ", trait)) # Capitalized and deconvoluted trait name
capitalized_clade <- tools::toTitleCase(gsub("_", " ", clade_name)) # Capitalized and deconvoluted clade name

createDir(ci_dir)
## [DEBUG] createDir: created data_exploration/1.Data-exploration/4.CI_overlaps

1. Confidence Interval Computation

This section calculates 95% confidence intervals for malignant and neoplasia prevalence per species using both Wilson and Jeffreys methods.

# Compute CIs directly using DescTools and binom packages
trait_ci <- trait_df %>%
  group_by(species) %>%
  dplyr::rename(n_population = .data[[p_trait]],
                n_observation = .data[[n_trait]],
                trait = .data[[trait]]) %>%
  dplyr::mutate(
    # Jeffreys method CIs
    trait_ci_lb = binom::binom.confint(n_observation, n_population, 
                                            method = "bayes", priors = c(0.5, 0.5), 
                                            conf.level = 0.95)[, 5],
    trait_ci_ub = binom::binom.confint(n_observation, n_population, 
                                            method = "bayes", priors = c(0.5, 0.5), 
                                            conf.level = 0.95)[, 6],
    trait_ci = paste0("[", round(trait_ci_lb, 2), "-", round(trait_ci_ub, 2), "]")
  ) %>%
  ungroup()
## Warning: Use of .data in tidyselect expressions was deprecated in tidyselect 1.2.0.
## ℹ Please use `all_of(var)` (or `any_of(var)`) instead of `.data[[var]]`
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Save results
write.csv(trait_ci, file.path(ci_dir, "trait_CI.csv"), 
          quote = FALSE, row.names = FALSE)

# Display preview
head(trait_ci)
## # A tibble: 6 × 22
##   species      common_name family n_population n_observation  trait benign_count
##   <chr>        <chr>       <chr>         <int>         <int>  <dbl>        <int>
## 1 Alouatta_ca… Black howl… Ateli…           32             4 0.125             3
## 2 Ateles_geof… Black-hand… Ateli…           43            13 0.302             5
## 3 Callimico_g… Goeldi's m… Cebid…           69            15 0.217             8
## 4 Callithrix_… White-fron… Cebid…           31             3 0.0968            2
## 5 Callithrix_… Common mar… Cebid…           41             1 0.0244            0
## 6 Cebuella_py… Pygmy marm… Cebid…           41             4 0.0976            2
## # ℹ 15 more variables: benign_prevalence <dbl>, malignant_count <int>,
## #   malignant_prevalence <dbl>, body_mass <dbl>, mass_g <dbl>,
## #   log_body_mass <dbl>, mature_f <dbl>, mature_m <dbl>, mature_age <dbl>,
## #   MLS.anage <dbl>, LQ <dbl>, tax_id <int>, trait_ci_lb <dbl>,
## #   trait_ci_ub <dbl>, trait_ci <chr>

3. Pairwise Comparison Preparation

This section prepares data for comparing cancer prevalence between all pairs of species.

# Helper function to create pairwise comparisons for a given method
create_pairwise_data <- function(ci_data) {
  # Define column names for lower and upper bounds  
  ci_subset <- ci_data %>%
    dplyr::select(species, n_population, 
          n_observation, trait, 
          trait_ci_lb, trait_ci_ub)
  
  # Generate all pairwise species combinations
  pairwise <- expand.grid(species1 = ci_subset$species,
                          species2 = ci_subset$species,
                          stringsAsFactors = FALSE)
  
  # Merge species data for each pair
  pairwise <- pairwise %>%
    left_join(ci_subset %>% rename_with(~paste0(.x, "1"), .cols = -species),
              by = c("species1" = "species")) %>%
    left_join(ci_subset %>% rename_with(~paste0(.x, "2"), .cols = -species),
              by = c("species2" = "species"))
  
  pairwise <- pairwise %>%
    mutate(
      trait_diff = trait1 - trait2,
      trait_overlap = ci_overlap(
        trait_ci_lb1, trait_ci_ub1, trait_ci_lb2, trait_ci_ub2
      ),
      diff_value = trait_diff,
      overlap_use = trait_overlap
    )
  
  return(pairwise)
}

# Create pairwise data
pairwise_data <- create_pairwise_data(trait_ci)

# Save results
write.csv(pairwise_data, file.path(ci_dir, "pairwise_data.csv"), 
          quote = FALSE, row.names = FALSE)

# Display preview
head(pairwise_data)
##               species1        species2 n_population1 n_observation1     trait1
## 1      Alouatta_caraya Alouatta_caraya            32              4 0.12500000
## 2     Ateles_geoffroyi Alouatta_caraya            43             13 0.30232558
## 3    Callimico_goeldii Alouatta_caraya            69             15 0.21739130
## 4 Callithrix_geoffroyi Alouatta_caraya            31              3 0.09677419
## 5   Callithrix_jacchus Alouatta_caraya            41              1 0.02439024
## 6     Cebuella_pygmaea Alouatta_caraya            41              4 0.09756098
##   trait_ci_lb1 trait_ci_ub1 n_population2 n_observation2 trait2 trait_ci_lb2
## 1 3.352372e-02   0.25257350            32              4  0.125   0.03352372
## 2 1.758696e-01   0.44262825            32              4  0.125   0.03352372
## 3 1.281707e-01   0.31916086            32              4  0.125   0.03352372
## 4 1.812308e-02   0.21626214            32              4  0.125   0.03352372
## 5 4.695278e-05   0.09147094            32              4  0.125   0.03352372
## 6 2.547171e-02   0.20034261            32              4  0.125   0.03352372
##   trait_ci_ub2  trait_diff trait_overlap  diff_value overlap_use
## 1    0.2525735  0.00000000          TRUE  0.00000000        TRUE
## 2    0.2525735  0.17732558          TRUE  0.17732558        TRUE
## 3    0.2525735  0.09239130          TRUE  0.09239130        TRUE
## 4    0.2525735 -0.02822581          TRUE -0.02822581        TRUE
## 5    0.2525735 -0.10060976          TRUE -0.10060976        TRUE
## 6    0.2525735 -0.02743902          TRUE -0.02743902        TRUE

4. Difference Matrix Creation

This section transforms pairwise data into matrix format for visualization.

# Helper function to build difference matrix
build_diff_matrix <- function(pairwise_data) {
  pairwise_data %>%
    select(species1, species2, diff_value) %>%
    pivot_wider(names_from = species2, values_from = diff_value) %>%
    column_to_rownames("species1") %>%
    as.matrix()
}

# Create matrices
matrix <- build_diff_matrix(pairwise_data)

# Save matrices
write.csv(matrix, file.path(ci_dir, "diff_matrix-CI.csv"), 
          quote = FALSE, row.names = TRUE)

# Display preview
head(matrix)
##                      Alouatta_caraya Ateles_geoffroyi Callimico_goeldii
## Alouatta_caraya           0.00000000      -0.17732558       -0.09239130
## Ateles_geoffroyi          0.17732558       0.00000000        0.08493428
## Callimico_goeldii         0.09239130      -0.08493428        0.00000000
## Callithrix_geoffroyi     -0.02822581      -0.20555139       -0.12061711
## Callithrix_jacchus       -0.10060976      -0.27793534       -0.19300106
## Cebuella_pygmaea         -0.02743902      -0.20476461       -0.11983033
##                      Callithrix_geoffroyi Callithrix_jacchus Cebuella_pygmaea
## Alouatta_caraya              0.0282258065         0.10060976     0.0274390244
## Ateles_geoffroyi             0.2055513878         0.27793534     0.2047646058
## Callimico_goeldii            0.1206171108         0.19300106     0.1198303287
## Callithrix_geoffroyi         0.0000000000         0.07238395    -0.0007867821
## Callithrix_jacchus          -0.0723839496         0.00000000    -0.0731707317
## Cebuella_pygmaea             0.0007867821         0.07317073     0.0000000000
##                      Cercopithecus_neglectus Colobus_guereza Eulemur_macaco
## Alouatta_caraya                   0.05357143     0.021907216   -0.175000000
## Ateles_geoffroyi                  0.23089701     0.199232798    0.002325581
## Callimico_goeldii                 0.14596273     0.114298521   -0.082608696
## Callithrix_geoffroyi              0.02534562    -0.006318590   -0.203225806
## Callithrix_jacchus               -0.04703833    -0.078702540   -0.275609756
## Cebuella_pygmaea                  0.02613240    -0.005531808   -0.202439024
##                      Galago_moholi Galago_senegalensis Gorilla_gorilla
## Alouatta_caraya        0.031250000         0.109848485     -0.08395522
## Ateles_geoffroyi       0.208575581         0.287174066      0.09337036
## Callimico_goeldii      0.123641304         0.202239789      0.00843608
## Callithrix_geoffroyi   0.003024194         0.081622678     -0.11218103
## Callithrix_jacchus    -0.069359756         0.009238729     -0.18456498
## Cebuella_pygmaea       0.003810976         0.082409460     -0.11139425
##                      Hylobates_lar Lemur_catta Leontopithecus_chrysomelas
## Alouatta_caraya        -0.03409091 -0.03073770                0.007352941
## Ateles_geoffroyi        0.14323467  0.14658788                0.184678523
## Callimico_goeldii       0.05830040  0.06165360                0.099744246
## Callithrix_geoffroyi   -0.06231672 -0.05896351               -0.020872865
## Callithrix_jacchus     -0.13470067 -0.13134746               -0.093256815
## Cebuella_pygmaea       -0.06152993 -0.05817673               -0.020086083
##                      Leontopithecus_rosalia Macaca_fascicularis Macaca_fuscata
## Alouatta_caraya                 0.002777778          0.07094595    0.031976744
## Ateles_geoffroyi                0.180103359          0.24827153    0.209302326
## Callimico_goeldii               0.095169082          0.16333725    0.124368049
## Callithrix_geoffroyi           -0.025448029          0.04272014    0.003750938
## Callithrix_jacchus             -0.097831978         -0.02966381   -0.068633012
## Cebuella_pygmaea               -0.024661247          0.04350692    0.004537720
##                      Macaca_nigra Macaca_silenus Mandrillus_sphinx
## Alouatta_caraya       0.097222222    0.036764706       0.036111111
## Ateles_geoffroyi      0.274547804    0.214090287       0.213436693
## Callimico_goeldii     0.189613527    0.129156010       0.128502415
## Callithrix_geoffroyi  0.068996416    0.008538899       0.007885305
## Callithrix_jacchus   -0.003387534   -0.063845050      -0.064498645
## Cebuella_pygmaea      0.069783198    0.009325681       0.008672087
##                      Mico_argentatus Microcebus_murinus Nycticebus_coucang
## Alouatta_caraya           0.07500000        -0.12061404         0.04166667
## Ateles_geoffroyi          0.25232558         0.05671155         0.21899225
## Callimico_goeldii         0.16739130        -0.02822273         0.13405797
## Callithrix_geoffroyi      0.04677419        -0.14883984         0.01344086
## Callithrix_jacchus       -0.02560976        -0.22122379        -0.05894309
## Cebuella_pygmaea          0.04756098        -0.14805306         0.01422764
##                      Nycticebus_pygmaeus Pan_troglodytes Papio_hamadryas
## Alouatta_caraya              -0.14030612     -0.04342105      0.07645631
## Ateles_geoffroyi              0.03701946      0.13390453      0.25378189
## Callimico_goeldii            -0.04791482      0.04897025      0.16884762
## Callithrix_geoffroyi         -0.16853193     -0.07164686      0.04823050
## Callithrix_jacchus           -0.24091588     -0.14403081     -0.02415345
## Cebuella_pygmaea             -0.16774515     -0.07086008      0.04901729
##                      Propithecus_coquereli Saguinus_bicolor Saguinus_imperator
## Alouatta_caraya                -0.03500000       0.08500000         0.12500000
## Ateles_geoffroyi                0.14232558       0.26232558         0.30232558
## Callimico_goeldii               0.05739130       0.17739130         0.21739130
## Callithrix_geoffroyi           -0.06322581       0.05677419         0.09677419
## Callithrix_jacchus             -0.13560976      -0.01560976         0.02439024
## Cebuella_pygmaea               -0.06243902       0.05756098         0.09756098
##                      Saguinus_midas Saguinus_oedipus Saimiri_sciureus
## Alouatta_caraya          0.12500000      -0.04664179       0.04741379
## Ateles_geoffroyi         0.30232558       0.13068379       0.22473937
## Callimico_goeldii        0.21739130       0.04574951       0.13980510
## Callithrix_geoffroyi     0.09677419      -0.07486760       0.01918799
## Callithrix_jacchus       0.02439024      -0.14725155      -0.05319596
## Cebuella_pygmaea         0.09756098      -0.07408082       0.01997477
##                      Sapajus_apella Theropithecus_gelada Trachypithecus_auratus
## Alouatta_caraya         0.009615385          0.105769231             0.08796296
## Ateles_geoffroyi        0.186940966          0.283094812             0.26528854
## Callimico_goeldii       0.102006689          0.198160535             0.18035427
## Callithrix_geoffroyi   -0.018610422          0.077543424             0.05973716
## Callithrix_jacchus     -0.090994371          0.005159475            -0.01264679
## Cebuella_pygmaea       -0.017823640          0.078330206             0.06052394
##                      Trachypithecus_cristatus Trachypithecus_francoisi
## Alouatta_caraya                    0.08928571             -0.008333333
## Ateles_geoffroyi                   0.26661130              0.168992248
## Callimico_goeldii                  0.18167702              0.084057971
## Callithrix_geoffroyi               0.06105991             -0.036559140
## Callithrix_jacchus                -0.01132404             -0.108943089
## Cebuella_pygmaea                   0.06184669             -0.035772358
##                      Varecia_rubra Varecia_variegata
## Alouatta_caraya        -0.06144068      -0.085526316
## Ateles_geoffroyi        0.11588490       0.091799266
## Callimico_goeldii       0.03095063       0.006864989
## Callithrix_geoffroyi   -0.08966648      -0.113752122
## Callithrix_jacchus     -0.16205043      -0.186136072
## Cebuella_pygmaea       -0.08887970      -0.112965340

5. Heatmap Visualization

This section creates heatmaps to visualize pairwise differences in prevalence. Species labels are colored based on prevalence quantiles.

# Simplified function to create colored labels based on trait quantiles
create_colored_labels <- function(trait_data) {
  # Calculate quantile thresholds
  q75 <- quantile(trait_data$trait, 0.75, na.rm = TRUE)
  q25 <- quantile(trait_data$trait, 0.25, na.rm = TRUE)
  
  # Create a lookup table with species and their colors
  label_df <- trait_data %>%
    mutate(
      color = case_when(
        trait >= q75 ~ "#D73027",  # Red for top quartile
        trait <= q25 ~ "#1A9850",  # Green for bottom quartile
        TRUE ~ "#000000"           # Black for middle
      )
    ) %>%
    select(species, color)
  
  # Create named vector of colored HTML labels
  colored_labels <- setNames(
    paste0("<span style='color:", label_df$color, ";'>", label_df$species, "</span>"),
    label_df$species
  )
  
  return(colored_labels)
}

# Helper function to create legend plot
create_legend_plot <- function() {
  legend_df <- tibble(
    category = c("Top", "Bottom"),
    color = c("#D73027", "#1A9850")
  )
  
  ggplot(legend_df, aes(x = 1, y = category, color = category)) +
    geom_point(size = 0) +
    scale_color_manual(
      values = setNames(legend_df$color, legend_df$category),
      guide = guide_legend(override.aes = list(size = 5))
    ) +
    labs(color = "Trait Categories") +
    theme_void() +
    theme(
      legend.position = "right",
      legend.title = element_text(size = 12),
      legend.text = element_text(size = 10)
    )
}

# Helper function to create heatmap
create_heatmap <- function(pairwise_data, colored_labels, 
                          highlight_nonoverlap = TRUE) {
  # Ensure label mapping is stable across axes
  if (is.null(names(colored_labels))) {
    stop("colored_labels must be a named vector with species names.")
  }
  pairwise_data <- pairwise_data %>%
    mutate(
      species1 = factor(species1, levels = names(colored_labels)),
      species2 = factor(species2, levels = names(colored_labels))
    )

  p <- ggplot(pairwise_data, aes(x = species2, y = species1, fill = abs(diff_value))) +
    geom_tile(color = "black", size = 0.25) +
    geom_text(aes(label = sprintf("%.2f", abs(diff_value))), size = 3) +
    scale_fill_gradient(low = "white", high = "salmon3", name = "Difference") +
    scale_x_discrete(labels = function(x) colored_labels[x]) +
    scale_y_discrete(labels = function(y) colored_labels[y]) +
    theme_minimal() +
    theme(
      axis.text.x = element_markdown(angle = 45, hjust = 1),
      axis.text.y = element_markdown(angle = 0),
      panel.grid = element_blank(),
      legend.position = "none"
    )
  
  # Add bold border for non-overlapping CIs if requested
  if (highlight_nonoverlap) {
    p <- p + 
      geom_tile(data = filter(pairwise_data, !overlap_use), 
                fill = NA, color = "black", size = 1.5) +
      labs(
        x = "Species", y = "Species",
        title = paste("Pairwise Differences in trait: ", capitalized_trait, " with Non-overlapping CIs", sep = ""),
        caption = "Bold border: non-overlapping CIs."
      )
  } else {
    p <- p +
      labs(
        x = "Species", y = "Species",
        title = paste("Pairwise Differences in trait: ", capitalized_trait, sep = ""),
        caption = "Bold border: non-overlapping CIs."
      )
  }
  
  return(p)
}

# Create colored labels directly from trait data
my_labels_colored <- create_colored_labels(trait_ci)

# Save colored labels
write.csv(my_labels_colored, file.path(ci_dir, "my_labels_colored.csv"), 
          quote = FALSE, row.names = TRUE)

# Create legend
legend_plot <- create_legend_plot()

# Create heatmaps with non-overlapping CI highlights
p_overlap <- create_heatmap(pairwise_data, my_labels_colored, highlight_nonoverlap = TRUE) + 
  legend_plot + plot_layout(ncol = 2, widths = c(1000, 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p_flat <- create_heatmap(pairwise_data, my_labels_colored, highlight_nonoverlap = FALSE) + 
  legend_plot + plot_layout(ncol = 2, widths = c(1000, 1))

# Save heatmaps
ggsave(file.path(ci_dir, "visual_matrix_no_overlap.png"), p_overlap, 
       width = 15, height = 12, dpi = "retina")
ggsave(file.path(ci_dir, "visual_matrix_flat.png"), p_flat, 
       width = 15, height = 12, dpi = "retina")

# Display plots
p_overlap

p_flat

6. Polished Tables Preparation

This section creates readable summary tables for cancer traits and pairwise comparisons.

# Polished table for cancer traits
trait_ci_polished <- trait_ci %>%
  mutate(
    trait = round(trait, 3),
  ) %>%
  dplyr::select(species, n_population, n_observation, 
        trait, trait_ci_lb, trait_ci_ub, trait_ci) %>%
  dplyr::rename(
    "Trait" = trait,
    "Trait CI Lower Bound" = trait_ci_lb,
    "Trait CI Upper Bound" = trait_ci_ub,
    "Trait CI" = trait_ci
  )

write.csv(trait_ci_polished, 
          file.path(ci_dir, "trait_CI_polished.csv"), 
          quote = FALSE, row.names = FALSE)

# Polished pairwise table - recreate from original data with all CI columns
trait_diff <- trait_ci %>%
  dplyr::select(species, n_population , n_observation, trait,
        trait_ci_lb, trait_ci_ub, trait_ci)

# Generate all pairwise species combinations
pairwise_data_full <- expand.grid(species1 = trait_diff$species,
                                   species2 = trait_diff$species,
                                   stringsAsFactors = FALSE) %>%
  left_join(trait_diff %>% rename_with(~paste0(.x, "1"), .cols = -species),
            by = c("species1" = "species")) %>%
  left_join(trait_diff %>% rename_with(~paste0(.x, "2"), .cols = -species),
            by = c("species2" = "species"))

# Create polished pairwise table
pairwise_data_end_polished <- pairwise_data_full %>%
  dplyr::mutate(
    trait_diff = abs(trait1 - trait2),
    trait_diff = round(trait_diff, 3),
    # Format CIs for both Wilson and Jeffreys methods
    trait_CI1 = sprintf("[%.2f-%.2f]", 
                                 as.numeric(trait_ci_lb1), 
                                 as.numeric(trait_ci_ub1)),
    trait_CI2 = sprintf("[%.2f-%.2f]", 
                                 as.numeric(trait_ci_lb2), 
                                 as.numeric(trait_ci_ub2)),
    diff_value = trait_diff
  ) %>%
  dplyr::select(species1, species2, 
         trait_diff, trait_CI1, trait_CI2) %>%
  dplyr::rename(
    "Trait Difference" = trait_diff,
    "Trait CI1" = trait_CI1,        
    "Trait CI2" = trait_CI2
  )

write.csv(pairwise_data_end_polished, 
          file.path(ci_dir, "pairwise_data_end_polished.csv"), 
          quote = FALSE, row.names = FALSE)

# Display preview
head(trait_ci_polished)
## # A tibble: 6 × 7
##   species              n_population n_observation Trait `Trait CI Lower Bound`
##   <chr>                       <int>         <int> <dbl>                  <dbl>
## 1 Alouatta_caraya                32             4 0.125              0.0335   
## 2 Ateles_geoffroyi               43            13 0.302              0.176    
## 3 Callimico_goeldii              69            15 0.217              0.128    
## 4 Callithrix_geoffroyi           31             3 0.097              0.0181   
## 5 Callithrix_jacchus             41             1 0.024              0.0000470
## 6 Cebuella_pygmaea               41             4 0.098              0.0255   
## # ℹ 2 more variables: `Trait CI Upper Bound` <dbl>, `Trait CI` <chr>

Session Information

if (length(phylo_debug_log) > 0) {
  cat("### Debug log\n")
  cat(paste0("[DEBUG] ", phylo_debug_log, "\n"))
}

Debug log

[DEBUG] args = cancer_traits_processed-LQ.csv | science.abn7829_data_s4.nex.tree | data_exploration | 1998 | primates | family | neoplasia_prevalence | adult_necropsy_count | neoplasia_necropsy | /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv | malignant_prevalence | LQ [DEBUG] seed_val = 1998 [DEBUG] workingDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/cf/c175614870c9d79243bb3274357168 [DEBUG] objDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/cf/c175614870c9d79243bb3274357168/obj [DEBUG] resultsDir = data_exploration [DEBUG] Working Directory: /home/miguel/IBE-UPF/PhD/PhyloPhere/work/cf/c175614870c9d79243bb3274357168 [DEBUG] Results Directory: data_exploration [DEBUG] data_exploration_dir = data_exploration/1.Data-exploration [DEBUG] species_distribution_dir = data_exploration/1.Data-exploration/1.Species_distribution [DEBUG] extreme_plots_dir = data_exploration/1.Data-exploration/2.Extreme_plots [DEBUG] phylo_distribution_dir = data_exploration/1.Data-exploration/3.Phylogenetic_distribution [DEBUG] ci_dir = data_exploration/1.Data-exploration/4.CI_overlaps [DEBUG] palettes loaded: primates=15, primates_dark=15, mammals=17 [DEBUG] trait_path = cancer_traits_processed-LQ.csv [DEBUG] trait_df rows = 47, cols = 19 [DEBUG] trait_df columns: tax_id, species, common_name, family, adult_necropsy_count, neoplasia_necropsy, neoplasia_prevalence, benign_count, benign_prevalence, malignant_count, malignant_prevalence, body_mass, mass_g, log_body_mass, mature_f, mature_m, mature_age, MLS.anage, LQ [DEBUG] trait_df species unique = 47 [DEBUG] tree_path = science.abn7829_data_s4.nex.tree [DEBUG] tree tips = 236, nodes = 235 [DEBUG] clade_name = primates [DEBUG] taxon_of_interest = family [DEBUG] trait = neoplasia_prevalence [DEBUG] n_trait = neoplasia_necropsy [DEBUG] p_trait = adult_necropsy_count [DEBUG] has.p = TRUE, p missing = 0 [DEBUG] has.n = TRUE, n missing = 0 [DEBUG] tax_id_file = /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv [DEBUG] tax_id_df rows = 1290, distinct taxa = 807 [DEBUG] has.TAX_ID = TRUE [DEBUG] trait_df merged with tax_id_df: rows = 47, missing tax_id = 0 [DEBUG] normalized tax_id from merged columns, missing tax_id = 0 [DEBUG] tree_ids rows = 40, missing tax_id = 0 [DEBUG] common_tax_ids = 40 [DEBUG] pruned_tree tips (TAX_ID) = 40, nodes = 39 [DEBUG] trait_df after TAX_ID tree filter rows = 40 [DEBUG] secondary_trait = malignant_prevalence [DEBUG] has.secondary = TRUE, missing = 0 [DEBUG] branch_trait = LQ [DEBUG] has.branch = TRUE, missing = 0 [DEBUG] createDir: created data_exploration/1.Data-exploration/4.CI_overlaps

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-conda-linux-gnu
## Running under: TUXEDO OS
## 
## Matrix products: default
## BLAS/LAPACK: /home/miguel/micromamba/envs/caas_global_cancer/lib/libopenblasp-r0.3.28.so;  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=es_ES.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Madrid
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggtext_0.1.2    patchwork_1.3.0 ggplot2_3.5.1   tibble_3.2.1   
## [5] ape_5.8-1       tidyr_1.3.1     dplyr_1.1.4     readr_2.1.5    
## 
## loaded via a namespace (and not attached):
##  [1] fastmatch_1.1-6         gtable_0.3.6            xfun_0.51              
##  [4] bslib_0.9.0             lattice_0.22-6          tzdb_0.5.0             
##  [7] numDeriv_2016.8-1.1     quadprog_1.5-8          vctrs_0.6.5            
## [10] tools_4.4.1             generics_0.1.3          parallel_4.4.1         
## [13] pkgconfig_2.0.3         Matrix_1.7-3            scatterplot3d_0.3-44   
## [16] lifecycle_1.0.4         binom_1.1-1.1           stringr_1.5.1          
## [19] compiler_4.4.1          farver_2.1.2            textshaping_1.0.0      
## [22] munsell_0.5.1           mnormt_2.1.1            combinat_0.0-8         
## [25] codetools_0.2-20        litedown_0.6            htmltools_0.5.8.1      
## [28] maps_3.4.2.1            sass_0.4.9              yaml_2.3.10            
## [31] pillar_1.10.1           crayon_1.5.3            jquerylib_0.1.4        
## [34] MASS_7.3-65             cachem_1.1.0            clusterGeneration_1.3.8
## [37] iterators_1.0.14        foreach_1.5.2           nlme_3.1-167           
## [40] phangorn_2.12.1         commonmark_1.9.5        tidyselect_1.2.1       
## [43] digest_0.6.37           stringi_1.8.4           purrr_1.0.4            
## [46] labeling_0.4.3          fastmap_1.2.0           grid_4.4.1             
## [49] colorspace_2.1-1        expm_1.0-0              cli_3.6.4              
## [52] magrittr_2.0.3          optimParallel_1.0-2     utf8_1.2.4             
## [55] withr_3.0.2             scales_1.3.0            DEoptim_2.2-8          
## [58] rmarkdown_2.29          igraph_2.1.4            ragg_1.3.3             
## [61] hms_1.1.3               coda_0.19-4.1           evaluate_1.0.3         
## [64] knitr_1.50              phytools_2.4-4          doParallel_1.0.17      
## [67] markdown_2.0            rlang_1.1.5             gridtext_0.1.5         
## [70] Rcpp_1.0.14             glue_1.8.0              xml2_1.3.8             
## [73] jsonlite_1.9.1          R6_2.6.1                systemfonts_1.2.1